Applying the multiscale analysis to real data:
Here is the region we are interested in.
## Visualize coarse data
dat = read.csv(file.path(datadir, filenames[1+4]))
dat = dat %>% select(lat, lon, month, Chl)
mydat = dat[which(dat[,"month"]==12),]
colfun = colorRampPalette(c("blue", "red"))
## colfun = colorRampPalette(c(rgb(0,0,1,0.1), rgb(1,0,0,0.1)))
newmap <- getMap()
map("world", fill=TRUE, col="white", bg="white", ylim=c(-90, 90), mar=c(0,0,0,0))
par(mar=c(0,0,0,0))
## plot(newmap)##, xlim = c(-20, 59), ylim = c(35, 71), asp = 1)
centers = sample(mydat$Chl, 5)
points(mydat[ ,c("lon", "lat")], pch=15, cex=4,
col=colfun(20)[as.numeric(cut(mydat$Chl,breaks = 20))])
map("world", fill=TRUE, col="white", bg="white", ylim=c(-90, 90), mar=c(0,0,0,0), add=TRUE)
rect(lonrange[1], latrange[1], lonrange[2], latrange[2],
border = c("yellow"), lwd=3)
Load and clean data.
## Retrieve the finest one (1/2 degree resolution)
if(FALSE){
darwin_dat = read.csv(file.path(datadir, filenames[4]))
real_dat = read.csv(file.path(datadir, filenames[4+4]))
darwin_dat = darwin_dat %>% select(lat, lon, month, Chl)
real_dat = real_dat %>% select(lat, lon, month, Chl)
head(real_dat)
## ## This might be better
darwin_dat = fread(file.path(datadir, filenames[2]), select=c("lat", "lon", "month", "Chl"))
real_dat = fread(file.path(datadir, filenames[2+4]), select=c("lat", "lon", "month", "Chl"))
darwin_dat %>% object.size() %>% format("Mb")
real_dat %>% object.size() %>% format("Mb")
saveRDS(darwin_dat, file=file.path(datadir, "darwin_chl_finest_resolution.RDS"))
saveRDS(real_dat, file=file.path(datadir, "real_chl_finest_resolution.RDS"))
## Load it
darwin_dat = readRDS(file=file.path(datadir, "darwin_chl_finest_resolution.RDS"))
real_dat = readRDS(file=file.path(datadir, "real_chl_finest_resolution.RDS"))
}
darwin_dat = fread(file.path(datadir, filenames[2]), select=c("lat", "lon", "month", "Chl"))
real_dat = fread(file.path(datadir, filenames[2+4]), select=c("lat", "lon", "month", "Chl"))
## Focus on yellow box in the North Pacific, and interpolate
real.jan.mat = real_dat %>% subset(month==1) %>% filter(lonrange[1] < lon,
lon < lonrange[2],
latrange[1] < lat,
lat < latrange[2]) %>%
make_mat() %>% fill_in_empty(2)
darwin.jan.mat = darwin_dat %>% subset(month==1) %>% filter(lonrange[1] < lon,
lon < lonrange[2],
latrange[1] < lat,
lat < latrange[2]) %>%
make_mat() %>% fill_in_empty(2)
Now, the January data at 2 degree resolution (no coarsening), from the two data sources, look like this.
## Plot the two together
gridExtra::grid.arrange(drawmat_precise(real.jan.mat, main = "Jan real"),
drawmat_precise(darwin.jan.mat, main = "Darwin real"),
ncol=2)
And here is the data after coarsening by non-overlapping bins:
## Calculate the OMD in this region
for(bin_size in 1:28){
gridExtra::grid.arrange(real.jan.mat %>% smoothmat(bin_size) %>% drawmat_precise( main = "Jan real"),
darwin.jan.mat %>% smoothmat(bin_size) %>% drawmat_precise(main = "Darwin real"),
ncol = 2,
top = textGrob(paste0("Bin size ", bin_size), gp = gpar(fontsize=20, font=2)))
}
Now, the calculate OMD:
objlist = list()
for(bin_size in 1:28){
obj = omd(real.jan.mat %>% smoothmat(bin_size),
darwin.jan.mat %>% smoothmat(bin_size))
objlist[[bin_size]] = obj
}
## Also calculate the pixel-wise differences
l2dists = c()
l2 <- function(m1, m2){mean((m1-m2)^2)}
for(bin_size in 1:28){
l2dists[bin_size] = l2(real.jan.mat %>% smoothmat(bin_size),
darwin.jan.mat %>% smoothmat(bin_size))
}
## Also calculate pixel-wise differences as images
pixel_wise_diffs = list()
for(bin_size in 1:28){
pixel_wise_diffs[[bin_size]] = (real.jan.mat %>% smoothmat(bin_size) - darwin.jan.mat %>% smoothmat(bin_size))
}
Then, make the mass transport plots:
## Make the plots
for(bin_size in 1:28){ plot_omd(objlist[[bin_size]]) }
Also visualize the pixel-wise differences as images:
plots = lapply(1:28, function(ii){
px = pixel_wise_diffs[[ii]]
px %>% drawmat_precise(main=paste0("Bin size ", ii))
})
do.call("grid.arrange", c(plots, ncol=3))
Also, visualize the two distances – OMD and L2 distances – over resolution:
omds = sapply(objlist, function(a)a$dist)
par(mfrow=c(1, 2))
plot(omds, type='o', lwd=2, main = "OMD", xlab = "Bin size", ylab = "OMD")
abline(v = seq(from=0, to=28, by=1), col=rgb(0,0,0,0.2), lwd=2, lty=3)
plot(l2dists, type='o', main="Pixel-wise L2 dist", xlab = "Bin size", lwd=2, lty=3, ylab = "L2 distance")
abline(v = seq(from=0, to=28, by=1), col=rgb(0,0,0,0.2), lwd=2, lty=3)